
from bs4 import BeautifulSoup
import requests
import csv
import pymysql
import re
from datetime import datetime

'''
使用BeautifulSoup爬取菜价信息，保存到CSV或者MySQL中
'''

# 打开数据库连接
db = pymysql.connect(host='localhost',
        user='root',
        password='123456',
        db='py-db',
        charset='utf8mb4')

# 北京新发地菜价
for page in range(1,100):
   print("当前页为："+ str(page))
   xfd_url = f'http://www.xinfadi.com.cn/marketanalysis/0/list/{page}.shtml'
   resp = requests.get(xfd_url)
   # fo = open("菜价.csv",mode='a',encoding="utf-8")
   # csv_writer = csv.writer(fo)

   # print(resp.text)

   # 把HTML页面交给BeautifulSoup管理
   soup = BeautifulSoup(resp.text,"html.parser")
   # 找到table标签  find为查找第一个，find_all为查询全部，class_为指定div的class属性，也可以用字典attrs表示
   cj_table = soup.find("table",class_="hq_table")
   # print(cj_table)
   # 查找表格的tr属性 这个cj_tr也是一个集合
   if page == 0:
      cj_tr = cj_table.find_all("tr")[1:]
   else:
      cj_tr = cj_table.find_all("tr")[1:]
   for obj in cj_tr:
      tds = obj.find_all("td")
      name = re.escape(tds[0].text) #拿到标签中的内容
      min_money = re.escape(tds[1].text)
      avg_money = re.escape(tds[2].text)
      max_money = re.escape(tds[3].text)
      specs = re.escape(tds[4].text)
      company = re.escape(tds[5].text)
      release_date = tds[6].text
      # 写入csv
      # csv_writer.writerow([name,min_money,avg_money,max_money,specs,company,release_date])
      d = datetime.strptime(release_date, '%Y-%m-%d') # 2021-05-07
      # 写入MySQL 使用cursor()方法获取操作游标
      cursor = db.cursor()
      sql = """INSERT INTO cj 
               (c_cai_name, c_min_money, c_avg_money, c_max_money,c_company,c_specs,c_release_date) 
               VALUE
               ('%s','%s','%s','%s','%s','%s','%s')"""
      print(name,min_money,avg_money,max_money,company,specs,d)
      cursor.execute(sql %(name,min_money,avg_money,max_money,company,specs,d))
      db.commit()
resp.close()
# fo.close()
db.close()


